from gensim import corpora, models, similarities
import jieba

texts = open('我国软件工程技术的现状.txt', 'r').read()
keyword = open('对软件开发工作进行创新的方法.txt', 'r').read()
texts = [jieba.lcut(text) for text in texts]
dictionary = corpora.Dictionary(texts)
'''print(dictionary) Dictionary(296 unique tokens: ['1', '.', ' ', '软', '件']...)'''
num_feature = len(dictionary.token2id)
'''print(num_feature) 296'''
corpus = [dictionary.doc2bow(text) for text in texts]
'''print(corpus)  [[(0, 1)], [(1, 1)], [(0, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(5, 1)], [(6, 1)], [(7, 1)], [(8, 1)], [(9, 1)], [(10, 1)], [(10, 1)], [(11, 1)], [(11, 1)], [(3, 1)], [(4, 1)], [(12, 1)], [(13, 1)], [(14, 1)], [(15, 1)], [(16, 1)], [(17, 1)], [(18, 1)], [(19, 1)], [(20, 1)], [(21, 1)], [(22, 1)], [(23, 1)], [(24, 1)], [(3, 1)], [(4, 1)], [(25, 1)], [(26, 1)], [(12, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(30, 1)], [(31, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(19, 1)], [(34, 1)], [(35, 1)], [(36, 1)], [(37, 1)], [(31, 1)], [(38, 1)], [(39, 1)], [(7, 1)], [(40, 1)], [(41, 1)], [(33, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(12, 1)], [(37, 1)], [(31, 1)], [(38, 1)], [(39, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(42, 1)], [(43, 1)], [(44, 1)], [(45, 1)], [(46, 1)], [(47, 1)], [(48, 1)], [(49, 1)], [(50, 1)], [(51, 1)], [(52, 1)], [(53, 1)], [(7, 1)], [(54, 1)], [(55, 1)], [(56, 1)], [(57, 1)], [(58, 1)], [(59, 1)], [(60, 1)], [(61, 1)], [(62, 1)], [(33, 1)], [(2, 1)], [(63, 1)], [(64, 1)], [(42, 1)], [(43, 1)], [(44, 1)], [(3, 1)], [(4, 1)], [(12, 1)], [(37, 1)], [(31, 1)], [(30, 1)], [(31, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(46, 1)], [(47, 1)], [(65, 1)], [(32, 1)], [(66, 1)], [(67, 1)], [(68, 1)], [(69, 1)], [(70, 1)], [(71, 1)], [(72, 1)], [(71, 1)], [(73, 1)], [(7, 1)], [(54, 1)], [(55, 1)], [(24, 1)], [(64, 1)], [(74, 1)], [(33, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(59, 1)], [(60, 1)], [(61, 1)], [(62, 1)], [(75, 1)], [(76, 1)], [(76, 1)], [(77, 1)], [(78, 1)], [(3, 1)], [(4, 1)], [(79, 1)], [(69, 1)], [(70, 1)], [(78, 1)], [(7, 1)], [(54, 1)], [(55, 1)], [(80, 1)], [(81, 1)], [(33, 1)], [(2, 1)], [(82, 1)], [(58, 1)], [(80, 1)], [(81, 1)], [(83, 1)], [(21, 1)], [(84, 1)], [(85, 1)], [(7, 1)], [(25, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(61, 1)], [(62, 1)], [(33, 1)], [(2, 1)], [(87, 1)], [(51, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(88, 1)], [(89, 1)], [(90, 1)], [(91, 1)], [(25, 1)], [(65, 1)], [(32, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(92, 1)], [(93, 1)], [(94, 1)], [(7, 1)], [(33, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(61, 1)], [(62, 1)], [(80, 1)], [(81, 1)], [(78, 1)], [(19, 1)], [(34, 1)], [(7, 1)], [(95, 1)], [(19, 1)], [(96, 1)], [(97, 1)], [(33, 1)], [(2, 1)], [(98, 1)], [(99, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(61, 1)], [(62, 1)], [(20, 1)], [(100, 1)], [(33, 1)], [(2, 1)], [(12, 1)], [(37, 1)], [(31, 1)], [(30, 1)], [(31, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(16, 1)], [(101, 1)], [(102, 1)], [(20, 1)], [(50, 1)], [(7, 1)], [(103, 1)], [(104, 1)], [(33, 1)], [(2, 1)], [(80, 1)], [(81, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(30, 1)], [(31, 1)], [(105, 1)], [(106, 1)], [(24, 1)], [(3, 1)], [(4, 1)], [(5, 1)], [(6, 1)], [(7, 1)], [(103, 1)], [(107, 1)], [(108, 1)], [(109, 1)], [(110, 1)], [(111, 1)], [(112, 1)], [(113, 1)], [(51, 1)], [(114, 1)], [(47, 1)], [(52, 1)], [(53, 1)], [(7, 1)], [(103, 1)], [(107, 1)], [(33, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(5, 1)], [(6, 1)], [(46, 1)], [(47, 1)], [(37, 1)], [(31, 1)], [(115, 1)], [(116, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(117, 1)], [(118, 1)], [(5, 1)], [(39, 1)], [(33, 1)], [(2, 1)], [(119, 1)], [(120, 1)], [(46, 1)], [(47, 1)], [(121, 1)], [(71, 1)], [(72, 1)], [(54, 1)], [(55, 1)], [(122, 1)], [(19, 1)], [(87, 1)], [(94, 1)], [(7, 1)], [(123, 1)], [(124, 1)], [(37, 1)], [(31, 1)], [(70, 1)], [(125, 1)], [(33, 1)], [(2, 1)], [(65, 1)], [(32, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(126, 1)], [(127, 1)], [(58, 1)], [(18, 1)], [(128, 1)], [(7, 1)], [(129, 1)], [(130, 1)], [(24, 1)], [(10, 1)], [(10, 1)], [(11, 1)], [(11, 1)], [(0, 1)], [(1, 1)], [(131, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(6, 1)], [(86, 1)], [(117, 1)], [(118, 1)], [(61, 1)], [(62, 1)], [(20, 1)], [(100, 1)], [(10, 1)], [(10, 1)], [(11, 1)], [(11, 1)], [(132, 1)], [(133, 1)], [(134, 1)], [(135, 1)], [(136, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(117, 1)], [(6, 1)], [(52, 1)], [(137, 1)], [(138, 1)], [(49, 1)], [(139, 1)], [(20, 1)], [(42, 1)], [(7, 1)], [(37, 1)], [(140, 1)], [(33, 1)], [(2, 1)], [(21, 1)], [(15, 1)], [(16, 1)], [(141, 1)], [(142, 1)], [(7, 1)], [(126, 1)], [(143, 1)], [(144, 1)], [(145, 1)], [(111, 1)], [(18, 1)], [(128, 1)], [(7, 1)], [(146, 1)], [(147, 1)], [(33, 1)], [(2, 1)], [(148, 1)], [(25, 1)], [(112, 1)], [(149, 1)], [(75, 1)], [(150, 1)], [(151, 1)], [(135, 1)], [(136, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(117, 1)], [(6, 1)], [(152, 1)], [(40, 1)], [(138, 1)], [(141, 1)], [(125, 1)], [(153, 1)], [(33, 1)], [(2, 1)], [(135, 1)], [(136, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(117, 1)], [(6, 1)], [(5, 1)], [(39, 1)], [(17, 1)], [(154, 1)], [(155, 1)], [(104, 1)], [(12, 1)], [(156, 1)], [(87, 1)], [(157, 1)], [(158, 1)], [(159, 1)], [(160, 1)], [(161, 1)], [(162, 1)], [(93, 1)], [(33, 1)], [(2, 1)], [(13, 1)], [(163, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(122, 1)], [(19, 1)], [(7, 1)], [(117, 1)], [(6, 1)], [(52, 1)], [(137, 1)], [(154, 1)], [(155, 1)], [(20, 1)], [(21, 1)], [(164, 1)], [(165, 1)], [(33, 1)], [(2, 1)], [(75, 1)], [(166, 1)], [(167, 1)], [(82, 1)], [(6, 1)], [(86, 1)], [(7, 1)], [(117, 1)], [(118, 1)], [(33, 1)], [(2, 1)], [(168, 1)], [(169, 1)], [(80, 1)], [(81, 1)], [(111, 1)], [(117, 1)], [(6, 1)], [(105, 1)], [(106, 1)], [(33, 1)], [(2, 1)], [(170, 1)], [(74, 1)], [(33, 1)], [(2, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(82, 1)], [(6, 1)], [(86, 1)], [(117, 1)], [(118, 1)], [(7, 1)], [(47, 1)], [(171, 1)], [(20, 1)], [(21, 1)], [(168, 1)], [(172, 1)], [(33, 1)], [(2, 1)], [(12, 1)], [(37, 1)], [(31, 1)], [(117, 1)], [(6, 1)], [(173, 1)], [(47, 1)], [(171, 1)], [(46, 1)], [(47, 1)], [(30, 1)], [(19, 1)], [(174, 1)], [(175, 1)], [(176, 1)], [(177, 1)], [(33, 1)], [(2, 1)], [(12, 1)], [(178, 1)], [(179, 1)], [(180, 1)], [(181, 1)], [(173, 1)], [(75, 1)], [(182, 1)], [(37, 1)], [(31, 1)], [(183, 1)], [(184, 1)], [(33, 1)], [(2, 1)], [(178, 1)], [(179, 1)], [(111, 1)], [(17, 1)], [(175, 1)], [(172, 1)], [(137, 1)], [(7, 1)], [(176, 1)], [(177, 1)], [(6, 1)], [(86, 1)], [(108, 1)], [(185, 1)], [(186, 1)], [(187, 1)], [(188, 1)], [(30, 1)], [(31, 1)], [(33, 1)], [(2, 1)], [(189, 1)], [(190, 1)], [(191, 1)], [(20, 1)], [(192, 1)], [(126, 1)], [(193, 1)], [(149, 1)], [(37, 1)], [(31, 1)], [(194, 1)], [(187, 1)], [(33, 1)], [(2, 1)], [(112, 1)], [(73, 1)], [(37, 1)], [(31, 1)], [(117, 1)], [(6, 1)], [(173, 1)], [(33, 1)], [(2, 1)], [(98, 1)], [(99, 1)], [(145, 1)], [(193, 1)], [(158, 1)], [(159, 1)], [(33, 1)], [(2, 1)], [(16, 1)], [(195, 1)], [(196, 1)], [(117, 1)], [(6, 1)], [(197, 1)], [(198, 1)], [(7, 1)], [(5, 1)], [(39, 1)], [(62, 1)], [(24, 1)], [(199, 1)], [(64, 1)], [(200, 1)], [(74, 1)], [(33, 1)], [(2, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(117, 1)], [(6, 1)], [(201, 1)], [(202, 1)], [(20, 1)], [(21, 1)], [(192, 1)], [(203, 1)], [(116, 1)], [(162, 1)], [(33, 1)], [(2, 1)], [(98, 1)], [(99, 1)], [(204, 1)], [(128, 1)], [(36, 1)], [(77, 1)], [(32, 1)], [(205, 1)], [(206, 1)], [(207, 1)], [(208, 1)], [(209, 1)], [(210, 1)], [(33, 1)], [(2, 1)], [(5, 1)], [(39, 1)], [(197, 1)], [(198, 1)], [(12, 1)], [(37, 1)], [(31, 1)], [(117, 1)], [(118, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(6, 1)], [(86, 1)], [(7, 1)], [(61, 1)], [(62, 1)], [(16, 1)], [(77, 1)], [(78, 1)], [(20, 1)], [(21, 1)], [(211, 1)], [(84, 1)], [(7, 1)], [(80, 1)], [(81, 1)], [(33, 1)], [(2, 1)], [(189, 1)], [(190, 1)], [(87, 1)], [(212, 1)], [(145, 1)], [(213, 1)], [(33, 1)], [(2, 1)], [(214, 1)], [(192, 1)], [(82, 1)], [(58, 1)], [(37, 1)], [(31, 1)], [(38, 1)], [(39, 1)], [(215, 1)], [(216, 1)], [(33, 1)], [(2, 1)], [(42, 1)], [(217, 1)], [(218, 1)], [(219, 1)], [(220, 1)], [(111, 1)], [(58, 1)], [(221, 1)], [(222, 1)], [(223, 1)], [(24, 1)], [(162, 1)], [(93, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(117, 1)], [(6, 1)], [(17, 1)], [(224, 1)], [(78, 1)], [(7, 1)], [(158, 1)], [(159, 1)], [(33, 1)], [(2, 1)], [(25, 1)], [(13, 1)], [(163, 1)], [(20, 1)], [(21, 1)], [(169, 1)], [(47, 1)], [(7, 1)], [(87, 1)], [(51, 1)], [(5, 1)], [(39, 1)], [(103, 1)], [(107, 1)], [(33, 1)], [(2, 1)], [(27, 1)], [(28, 1)], [(29, 1)], [(117, 1)], [(6, 1)], [(52, 1)], [(137, 1)], [(139, 1)], [(78, 1)], [(216, 1)], [(37, 1)], [(33, 1)], [(2, 1)], [(108, 1)], [(89, 1)], [(225, 1)], [(128, 1)], [(105, 1)], [(226, 1)], [(56, 1)], [(57, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(61, 1)], [(62, 1)], [(33, 1)], [(2, 1)], [(227, 1)], [(228, 1)], [(3, 1)], [(4, 1)], [(17, 1)], [(104, 1)], [(12, 1)], [(7, 1)], [(229, 1)], [(230, 1)], [(24, 1)], [(10, 1)], [(10, 1)], [(11, 1)], [(11, 1)], [(0, 1)], [(1, 1)], [(231, 1)], [(2, 1)], [(54, 1)], [(55, 1)], [(70, 1)], [(125, 1)], [(232, 1)], [(116, 1)], [(52, 1)], [(53, 1)], [(104, 1)], [(12, 1)], [(87, 1)], [(94, 1)], [(7, 1)], [(233, 1)], [(234, 1)], [(10, 1)], [(10, 1)], [(11, 1)], [(11, 1)], [(12, 1)], [(13, 1)], [(14, 1)], [(112, 1)], [(51, 1)], [(54, 1)], [(55, 1)], [(235, 1)], [(173, 1)], [(150, 1)], [(33, 1)], [(2, 1)], [(115, 1)], [(116, 1)], [(166, 1)], [(19, 1)], [(3, 1)], [(4, 1)], [(89, 1)], [(225, 1)], [(236, 1)], [(92, 1)], [(237, 1)], [(50, 1)], [(238, 1)], [(239, 1)], [(24, 1)], [(58, 1)], [(17, 1)], [(83, 1)], [(21, 1)], [(211, 1)], [(84, 1)], [(7, 1)], [(240, 1)], [(181, 1)], [(108, 1)], [(25, 1)], [(241, 1)], [(203, 1)], [(242, 1)], [(243, 1)], [(145, 1)], [(87, 1)], [(244, 1)], [(245, 1)], [(246, 1)], [(247, 1)], [(192, 1)], [(203, 1)], [(139, 1)], [(78, 1)], [(7, 1)], [(57, 1)], [(166, 1)], [(248, 1)], [(24, 1)], [(135, 1)], [(136, 1)], [(13, 1)], [(163, 1)], [(7, 1)], [(3, 1)], [(4, 1)], [(42, 1)], [(50, 1)], [(25, 1)], [(249, 1)], [(250, 1)], [(251, 1)], [(252, 1)], [(206, 1)], [(37, 1)], [(31, 1)], [(253, 1)], [(126, 1)], [(7, 1)], [(33, 1)], [(2, 1)], [(189, 1)], [(39, 1)], [(21, 1)], [(87, 1)], [(51, 1)], [(252, 1)], [(206, 1)], [(33, 1)], [(2, 1)], [(83, 1)], [(21, 1)], [(35, 1)], [(36, 1)], [(7, 1)], [(254, 1)], [(7, 1)], [(108, 1)], [(25, 1)], [(255, 1)], [(166, 1)], [(24, 1)], [(3, 1)], [(4, 1)], [(12, 1)], [(256, 1)], [(179, 1)], [(257, 1)], [(258, 1)], [(120, 1)], [(33, 1)], [(2, 1)], [(19, 1)], [(34, 1)], [(259, 1)], [(50, 1)], [(33, 1)], [(2, 1)], [(252, 1)], [(206, 1)], [(108, 1)], [(89, 1)], [(225, 1)], [(48, 1)], [(139, 1)], [(215, 1)], [(57, 1)], [(7, 1)], [(166, 1)], [(248, 1)], [(24, 1)], [(148, 1)], [(25, 1)], [(12, 1)], [(3, 1)], [(4, 1)], [(37, 1)], [(31, 1)], [(30, 1)], [(260, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(261, 1)], [(128, 1)], [(215, 1)], [(57, 1)], [(141, 1)], [(142, 1)], [(105, 1)], [(248, 1)], [(7, 1)], [(25, 1)], [(71, 1)], [(72, 1)], [(54, 1)], [(55, 1)], [(33, 1)], [(2, 1)], [(65, 1)], [(32, 1)], [(82, 1)], [(54, 1)], [(55, 1)], [(7, 1)], [(69, 1)], [(70, 1)], [(239, 1)], [(116, 1)], [(33, 1)], [(2, 1)], [(89, 1)], [(225, 1)], [(82, 1)], [(19, 1)], [(34, 1)], [(37, 1)], [(31, 1)], [(262, 1)], [(82, 1)], [(223, 1)], [(263, 1)], [(22, 1)], [(33, 1)], [(2, 1)], [(241, 1)], [(203, 1)], [(37, 1)], [(87, 1)], [(140, 1)], [(264, 1)], [(42, 1)], [(3, 1)], [(4, 1)], [(236, 1)], [(92, 1)], [(7, 1)], [(141, 1)], [(142, 1)], [(105, 1)], [(248, 1)], [(24, 1)], [(148, 1)], [(25, 1)], [(13, 1)], [(14, 1)], [(7, 1)], [(3, 1)], [(4, 1)], [(54, 1)], [(55, 1)], [(70, 1)], [(125, 1)], [(265, 1)], [(266, 1)], [(267, 1)], [(104, 1)], [(12, 1)], [(156, 1)], [(20, 1)], [(21, 1)], [(211, 1)], [(84, 1)], [(7, 1)], [(233, 1)], [(234, 1)], [(33, 1)], [(2, 1)], [(43, 1)], [(44, 1)], [(3, 1)], [(4, 1)], [(12, 1)], [(30, 1)], [(31, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(233, 1)], [(268, 1)], [(205, 1)], [(206, 1)], [(7, 1)], [(269, 1)], [(270, 1)], [(33, 1)], [(2, 1)], [(82, 1)], [(19, 1)], [(34, 1)], [(7, 1)], [(54, 1)], [(55, 1)], [(37, 1)], [(31, 1)], [(70, 1)], [(125, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(105, 1)], [(106, 1)], [(20, 1)], [(220, 1)], [(33, 1)], [(2, 1)], [(189, 1)], [(190, 1)], [(271, 1)], [(272, 1)], [(223, 1)], [(191, 1)], [(185, 1)], [(186, 1)], [(139, 1)], [(78, 1)], [(273, 1)], [(274, 1)], [(33, 1)], [(2, 1)], [(107, 1)], [(275, 1)], [(243, 1)], [(125, 1)], [(19, 1)], [(34, 1)], [(276, 1)], [(277, 1)], [(278, 1)], [(279, 1)], [(280, 1)], [(20, 1)], [(21, 1)], [(168, 1)], [(169, 1)], [(7, 1)], [(281, 1)], [(282, 1)], [(33, 1)], [(2, 1)], [(283, 1)], [(284, 1)], [(19, 1)], [(34, 1)], [(285, 1)], [(286, 1)], [(95, 1)], [(19, 1)], [(3, 1)], [(4, 1)], [(7, 1)], [(32, 1)], [(6, 1)], [(17, 1)], [(33, 1)], [(2, 1)], [(46, 1)], [(47, 1)], [(169, 1)], [(287, 1)], [(48, 1)], [(49, 1)], [(71, 1)], [(72, 1)], [(288, 1)], [(289, 1)], [(33, 1)], [(2, 1)], [(98, 1)], [(99, 1)], [(290, 1)], [(19, 1)], [(34, 1)], [(7, 1)], [(166, 1)], [(248, 1)], [(7, 1)], [(291, 1)], [(292, 1)], [(33, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(30, 1)], [(260, 1)], [(52, 1)], [(293, 1)], [(46, 1)], [(47, 1)], [(37, 1)], [(31, 1)], [(87, 1)], [(94, 1)], [(7, 1)], [(294, 1)], [(295, 1)], [(24, 1)]]'''
tfidf = models.TfidfModel(corpus)
''' print(tfidf)  TfidfModel(num_docs=1170, num_nnz=1170)'''
new_vec = dictionary.doc2bow(jieba.lcut(keyword))
'''相似度计算'''
index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_feature)
print('\nTF-IDF模型的稀疏向量集：')
for i in tfidf[corpus]:
    print(i)
print('\nTF-IDF模型的keyword稀疏向量：')
print(tfidf[new_vec])
print('\n相似度计算：')
sim = index[tfidf[new_vec]]
for i in range(len(sim)):
    print('第', i + 1, '句话的相似度为：', sim[i])
